# Script for the paper "Identifying global hotspots of agricultural expansion into non-forest ecosystems"

library(terra)
library(data.table)
library(exactextractr)

rm(list=ls(all=TRUE))
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
gc(T, verbose = F)

year.t1 <- 2000
year.t2 <- 2010
year.t3 <- 2020

# Land cover datasets divides global land cover into small tiles
# Read the names of all the small tiles
tile.names <- list.files("2000/tifs/",pattern = ".tif", full.names = F )

# Paths of maps of admin boundaries, protected areas and key biodiversity areas.
# Please download from the links provided in README
admin.file <- "path of map of admin boundaries"
pa.file <- "path of map of protected areas"
kba.file <- "path of map of key biodiversity areas"

# Create directories for the results at each level
dir.create(paste0("admin_level_transitions"), showWarnings = F)
dir.create(paste0("pa_level_transitions"), showWarnings = F)
dir.create(paste0("kba_level_transitions"), showWarnings = F)

# For GlobeLand30, define a function to reclassify some pixel values to smaller values to reduce raster size in later calculation
# m <- c(0, NA,
#        100, 2,
#        101, 3,
#        102, 4,
#        104, 5,
#        105, 6,
#        255, 1)
# 
# rclmat <- matrix(m, ncol=2, byrow=TRUE)
# 
# dtLookup <- data.table(
#   old = c(0, 2, 3, 4, 5, 6, 1),
#   new = c(NA, 100, 101, 102, 104, 105, 255)
# )
# 
# dt.replaceValueUsingLookup <- function(dt, col, dtLookup) {
#   dt[
#     dtLookup,
#     on = setNames("old", col),
#     (col) := new
#   ]
# }


# For each small tile, calculate conversion within admin boundaries/protected areas/key biodiversity areas
for (tile.name in tile.names){
  print(tile.name)
  gc(T, verbose = F)
  
  # Import land cover rasters for 2000, 2010 and 2020, conduct reclassification, and conduct resampling (ensure consistent resolution)
  map.t1 <- rast(paste0(year.t1, "/tifs/", tile.name))
  map.t2 <- rast(paste0(year.t2, "/tifs/", tile.name))
  map.t3 <- rast(paste0(year.t3, "/tifs/", tile.name))
  
  # For GlobeLand30
  # map.t1 <- classify(map.t1, rclmat)
  # map.t2 <- classify(map.t2, rclmat)
  # map.t3 <- classify(map.t3, rclmat)
  
  if (!(res(map.t1) == res(map.t2))[1] | !(res(map.t1) == res(map.t2))[2]){
    map.t1 <- project(map.t1, map.t2, "near")
  }
  
  if (!(res(map.t3) == res(map.t2))[1] | !(res(map.t3) == res(map.t2))[2]){
    map.t3 <- project(map.t3, map.t2, "near")
  }
  
  # Import vectors of admin boundaries,protected areas,key biodiversity areas
  # cut them to the extent of the land cover rasters
  # and project them to the coordinate of the land cover rasters
  lon.lat.crs <- "EPSG:4326"
  
  tile.shape1 <- vect(admin.file, extent = ext(project(vect(ext(map.t1), crs = crs(map.t1)), lon.lat.crs)))
  tile.shape2 <- vect(pa.file, extent = ext(project(vect(ext(map.t1), crs = crs(map.t1)), lon.lat.crs)))
  tile.shape3 <- vect(kba.file, extent = ext(project(vect(ext(map.t1), crs = crs(map.t1)), lon.lat.crs)))

  tile.shape1 <- project(tile.shape1, crs(map.t1))
  tile.shape2 <- project(tile.shape2, crs(map.t1))
  tile.shape3 <- project(tile.shape3, crs(map.t1))

  # If the vectors are not null, mosaic land cover rasters in 2000,2010 and 2020 into a single raster
  # and then calculate conversion within admin boundaries/protected areas/key biodiversity areas
  if (length(tile.shape1) > 0 || length(tile.shape2) > 0 || length(tile.shape3) > 0) {
    
    # Map.t1 and Map.t2 are multiplied by different values and then added to Map.t3
    # in order to combine different land cover codes from layers for three years into a single code in a single layer, simplifying further analysis. 
    # For example, if a pixel is classified as grassland (code 30) in 2000 and as cropland (code 10) in both 2010 and 2020, its new code would be calculated as: 30*10000+10*100+10=301010
    # This encoding allows the land cover classification for each year to be easily extracted later.
    
    map.t1 <- map.t1 * 10000
    map.t2 <- map.t2 * 100
    
    transitions <- mosaic(map.t1, map.t2, map.t3,  fun = "sum")

    rm(map.t1, map.t2, map.t3)
    gc(T, verbose = F)  
    
    if(length(tile.shape1) > 0){
     
      # Calculate conversion within admin boundaries
      curr.output1 <- exact_extract(transitions,sf::st_as_sf(tile.shape1),
                                    coverage_area = T, progress = F)
      
      # Turn into one data.table and define names of columns
      curr.output1 <- rbindlist(curr.output1, idcol = T)
      setnames(curr.output1, c("admin.id", "transition","km2"))

      # Convert from m2 to km2 and sum across admin units and transition
      curr.output1 <- curr.output1[,.(km2 = sum(km2) / 1000 / 1000), by = c("admin.id", "transition")]
      curr.output1 <- curr.output1[!is.na(transition)]
      
      # Get the unique ids (UID) from the admin boundary map to match
      # For each polygons in the vectors, we have already assigned a unique id in advance
      id.match <- as.data.table(tile.shape1)[, .(UID)][, admin.id := .I]
      
      # Break up the values of the transition column into separate columns that represent land covers in each year
      curr.output1[,land.use.t1 := floor(transition/10000)]
      curr.output1[,land.use.t2 := floor(transition/100) %% 100]
      curr.output1[,land.use.t3 := transition %% 100]
      
      # Align with admin unit UIDs and remove unnecessary columns
      curr.output1 <- merge(curr.output1, id.match, by = "admin.id")
      curr.output1[, c("admin.id","transition") := NULL]
      
      # # for Globeland30
      # dt.replaceValueUsingLookup(curr.output1, "land.use.t1", dtLookup)
      # dt.replaceValueUsingLookup(curr.output1, "land.use.t2", dtLookup)
      # dt.replaceValueUsingLookup(curr.output1, "land.use.t3", dtLookup)
      
      # save the output for each small tile
      saveRDS(curr.output1, paste0("admin_level_transitions/transitions_", tile.name, ".RDS"))
      
      rm(tile.shape1, curr.output1, id.match)
      gc(T, verbose = F) 
      
    }
    
    # Below are the same procedures for protected areas, key biodiversity areas
    if(length(tile.shape2) > 0){
      curr.output2 <- exact_extract(transitions,sf::st_as_sf(tile.shape2),
                                    coverage_area = T, progress = F)
      
      curr.output2 <- rbindlist(curr.output2, idcol = T)
      setnames(curr.output2, c("pa.id", "transition","km2"))

      curr.output2 <- curr.output2[,.(km2 = sum(km2) / 1000 / 1000), by = c("pa.id", "transition")]
      curr.output2 <- curr.output2[!is.na(transition)]
      
      id.match <- as.data.table(tile.shape2)[, .(UID)][, pa.id := .I]
      
      curr.output2[,land.use.t1 := floor(transition/10000)]
      curr.output2[,land.use.t2 := floor(transition/100) %% 100]
      curr.output2[,land.use.t3 := transition %% 100]
      
      curr.output2 <- merge(curr.output2, id.match, by = "pa.id")
      curr.output2[, c("pa.id","transition") := NULL]
      
      # dt.replaceValueUsingLookup(curr.output2, "land.use.t1", dtLookup)
      # dt.replaceValueUsingLookup(curr.output2, "land.use.t2", dtLookup)
      # dt.replaceValueUsingLookup(curr.output2, "land.use.t3", dtLookup)
      
      saveRDS(curr.output2, paste0("pa_level_transitions/transitions_", tile.name, ".RDS"))
      
      rm(tile.shape2, curr.output2, id.match)
      gc(T, verbose = F) 
    }
    
    if(length(tile.shape3) > 0){
      curr.output3 <- exact_extract(transitions,sf::st_as_sf(tile.shape3),
                                    coverage_area = T, progress = F)
      
      curr.output3 <- rbindlist(curr.output3, idcol = T)
      setnames(curr.output3, c("kba.id", "transition","km2"))

      curr.output3 <- curr.output3[,.(km2 = sum(km2) / 1000 / 1000), by = c("kba.id", "transition")]
      curr.output3 <- curr.output3[!is.na(transition)]
      
      id.match <- as.data.table(tile.shape3)[, .(UID)][, kba.id := .I]
      
      curr.output3[,land.use.t1 := floor(transition/10000)]
      curr.output3[,land.use.t2 := floor(transition/100) %% 100]
      curr.output3[,land.use.t3 := transition %% 100]
      
      curr.output3 <- merge(curr.output3, id.match, by = "kba.id")
      curr.output3[, c("kba.id","transition") := NULL]
      
      # dt.replaceValueUsingLookup(curr.output3, "land.use.t1", dtLookup)
      # dt.replaceValueUsingLookup(curr.output3, "land.use.t2", dtLookup)
      # dt.replaceValueUsingLookup(curr.output3, "land.use.t3", dtLookup)
      
      saveRDS(curr.output3, paste0("kba_level_transitions/transitions_", tile.name, ".RDS"))
      
      rm(tile.shape3, curr.output3, id.match)
      gc(T, verbose = F) 
    }
    
    
  }
}


# Integrate results for small tiles into a single one
# define a funtion to read all(RDS) files in bulk and join them
do.call_rbind_fread <- function(path, pattern) {
  files = list.files(path, pattern, full.names = TRUE)
  do.call(rbind, lapply(files, function(x) readRDS(x)))
}

dir.create("admin_level_transitions/single_files", showWarnings = F)
dir.create("pa_level_transitions/single_files", showWarnings = F)
dir.create("kba_level_transitions/single_files", showWarnings = F)

# integrate into one large file with the function and save data
change.data1 <- do.call_rbind_fread("admin_level_transitions/", "*.RDS")
change.data1 <- change.data1[,.(km2 = sum(km2)), by = c("UID","land.use.t1", "land.use.t2", "land.use.t3")]
setnames(change.data1, c("land.use.t1", "land.use.t2", "land.use.t3"), c("lc.2000","lc.2010","lc.2020"))
saveRDS(change.data1, "admin_level_transitions/single_files/transitions_2000_2010_2020_admin.rds")
gc(T, verbose = F) 

change.data2 <- do.call_rbind_fread("pa_level_transitions/", "*.RDS")
change.data2 <- change.data2[,.(km2 = sum(km2)), by = c("UID","land.use.t1", "land.use.t2", "land.use.t3")]
setnames(change.data2, c("land.use.t1", "land.use.t2", "land.use.t3"), c("lc.2000","lc.2010","lc.2020"))
saveRDS(change.data2, "pa_level_transitions/single_files/transitions_2000_2010_2020_pa.rds")
gc(T, verbose = F)

change.data3 <- do.call_rbind_fread("kba_level_transitions/", "*.RDS")
change.data3 <- change.data3[,.(km2 = sum(km2)), by = c("UID","land.use.t1", "land.use.t2", "land.use.t3")]
setnames(change.data3, c("land.use.t1", "land.use.t2", "land.use.t3"), c("lc.2000","lc.2010","lc.2020"))
saveRDS(change.data3, "kba_level_transitions/single_files/transitions_2000_2010_2020_kba.rds")
gc(T, verbose = F)


# Filter the conversion concerned in this study from the above RDS files
# Below is an example for country level results

crop.code <- 7                  # Reclassified pixel value for cultivated land in GLCLUC
land.code <- c(2,3,4)      # Reclassified pixel values for forestland, grassland & shrubland and non-forested wetland

# Match unique id with country names
admin.info <- as.data.table(vect(admin.file))[,.(UID,NAME_0)]
transition.data <- readRDS("admin_level_transitions/single_files/transitions_2000_2010_2020_admin.rds")
transition.data <- merge(transition.data, admin.info,by = "UID")

# Calculate conversion and reversion
# For the demo, there is no cropland in these areas, so there is no conversion detected.
transition.tot <- data.table()
for (lu.check in land.code) {
  conversion.2000.2010 <- transition.data[lc.2000 == lu.check & lc.2010 == crop.code,.(conversion.2000.2010 = sum(km2,na.rm = T)),by=c("lc.2000","NAME_0")]
  conversion.2010.2020 <- transition.data[lc.2010 == lu.check & lc.2020 == crop.code,.(conversion.2010.2020 = sum(km2,na.rm = T)),by=c("lc.2010","NAME_0")]
  setnames(conversion.2000.2010,"lc.2000","lc.code")
  setnames(conversion.2010.2020,"lc.2010","lc.code")
  
  reversion.2000.2010 <- transition.data[lc.2000 == crop.code & lc.2010 == lu.check,.(reversion.2000.2010 = sum(km2,na.rm = T)),by=c("lc.2010","NAME_0")]
  reversion.2010.2020 <- transition.data[lc.2010 == crop.code & lc.2020 == lu.check,.(reversion.2010.2020 = sum(km2,na.rm = T)),by=c("lc.2020","NAME_0")]
  setnames(reversion.2000.2010,"lc.2010","lc.code")
  setnames(reversion.2010.2020,"lc.2020","lc.code")
  
  curr.transition.tot <- merge(conversion.2000.2010,conversion.2010.2020,by = c("lc.code","NAME_0"),all=T)
  curr.transition.tot <- merge(curr.transition.tot,reversion.2000.2010,by = c("lc.code","NAME_0"),all=T)
  curr.transition.tot <- merge(curr.transition.tot,reversion.2010.2020,by = c("lc.code","NAME_0"),all=T)
  
  transition.tot <- rbind(transition.tot,curr.transition.tot)
}

openxlsx::write.xlsx(transition.tot, "admin_level_transitions/single_files/transitions_2000_2010_2020_admin.xlsx")

